Data processing
# Loading all packages
library(Seurat); library(hdf5r); library(tidyverse)
## Loading required package: SeuratObject
## Loading required package: sp
## 'SeuratObject' was built under R 4.4.0 but the current version is
## 4.4.1; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 'SeuratObject' was built with package 'Matrix' 1.7.0 but the current
## version is 1.7.1; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten_df() masks hdf5r::flatten_df()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load data
matrix = Read10X_h5("/Users/tranchau/Documents/GFP_2024/Data/S11_WER_GFP/filtered_feature_bc_matrix.h5")
# Create Seurat object
Sample <- CreateSeuratObject(matrix, project = "WER")
VlnPlot(Sample, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

# Quality control
Sample <- subset(Sample, subset = nFeature_RNA > 500 & nFeature_RNA < 8000 & nCount_RNA > 500 & nCount_RNA < 40000 )
VlnPlot(Sample, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

# Normalize data
Sample <- NormalizeData(object = Sample, verbose = FALSE)
# Find highly variable genes
Sample <- FindVariableFeatures(object = Sample, selection.method = "vst", verbose = FALSE)
# Scale data
Sample <- ScaleData(Sample, features = rownames(Sample))
## Centering and scaling data matrix
# Reduce the dimention of the data
Sample <- RunPCA(Sample, features = VariableFeatures(object = Sample), verbose = FALSE)
ElbowPlot(Sample)

# Clustering
Sample <- FindNeighbors(Sample, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
Sample <- FindClusters(Sample, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8572
## Number of edges: 289043
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9109
## Number of communities: 17
## Elapsed time: 0 seconds
# Plot UMAP
Sample <- RunUMAP(Sample, reduction = "pca", dims = 1:30)
## 15:52:03 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:52:03 Read 8572 rows and found 30 numeric columns
## 15:52:03 Using Annoy for neighbor search, n_neighbors = 30
## 15:52:03 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:52:04 Writing NN index file to temp file /var/folders/bc/jp_ttqgx5mjdbhjxqr1w7p4w0000gn/T//Rtmp7PWts9/file1686789415bf
## 15:52:04 Searching Annoy index using 1 thread, search_k = 3000
## 15:52:05 Annoy recall = 100%
## 15:52:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 15:52:06 Initializing from normalized Laplacian + noise (using RSpectra)
## 15:52:06 Commencing optimization for 500 epochs, with 370538 positive edges
## 15:52:15 Optimization finished
DimPlot(Sample, reduction = "umap", label = TRUE)

#saveRDS(Sample, "/Users/tranchau/Documents/GFP_2024/Data/S11_WER_GFP/Seurat_Obj_WER.rds")
Label for each cluster
Sample_rename <- RenameIdents(object = Sample, '0' = "MZ", '1' = "Cortex", '2' = "Stele", '3'= "Hair", '4'= "Rootcap", '5' = "Endodermis", '6' = "Rootcap", '7'= "MZ", '8' = "Nonhair", '9' = "Rootcap", '10'= "Phloem", '11' = "MZ", '12' = "MZ", '13' = "Xylem", '14' = "Rootcap", '15' = "Rootcap", '16' = "Stele")
# Plot the UMAP with cell type annotation
DimPlot(Sample_rename, reduction = "umap", label = TRUE)

Feature plots
FeaturePlot(Sample, features = "GFP")

FeaturePlot(Sample, features = "gene:AT5G14750")

Dotplot
DotPlot(object = Sample_rename, features = c("gene:AT1G62510", "gene:AT5G53370", "gene:AT4G14010", "gene:AT1G44800", "gene:AT1G64660", "gene:AT2G46890", "gene:AT1G27740", "gene:AT4G34580", "gene:AT1G62980", "gene:AT5G17520", "gene:AT3G52180", "gene:AT3G11550", "gene:AT2G36100", "gene:AT2G14900", "gene:AT5G45210", "gene:AT2G37260", "gene:AT5G40330", "gene:AT1G65310", "gene:AT1G79430", "gene:AT5G50720", "gene:AT1G68810", "gene:AT4G35350", "gene:AT1G71930"), cols = "RdYlBu", col.min= -2, col.max = 2, dot.scale = 4) +
theme(axis.text.x = element_text(size=8, angle = 90, hjust=1),
axis.text.y = element_text(size=10, angle = 0, hjust=1),
axis.title.y = element_text(size=15, angle = 90, vjust=1),
legend.title = element_text(size=10),
legend.text = element_text(size=8)) +
xlab('Gene') +
ylab('Cell type')

DoHeatmap plot
# Find marker genes
Sample_rename_marker <- FindAllMarkers(Sample_rename, only.pos = TRUE, min.pct = 0.5, logfc.threshold = 0.5) %>%
group_by(cluster) %>%
arrange(cluster, desc(avg_log2FC))
## Calculating cluster MZ
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster Cortex
## Calculating cluster Stele
## Calculating cluster Hair
## Calculating cluster Rootcap
## Calculating cluster Endodermis
## Calculating cluster Nonhair
## Calculating cluster Phloem
## Calculating cluster Xylem
DoHeatmap(Sample_rename, features = Sample_rename_marker$gene, angle = 120) + NoLegend() + scale_fill_gradientn(colors = c("tomato", "white", "darkgreen"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
